{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Ejercicio Regresión Logística\n", "\n", "Crear un clasificador basado en el algoritmo de regresión logistica para predecir si el valor de la vivienda supera la media\n", "\n", "entrada: housing.csv\n", "\n", "Procedimiento:\n", "- Cargar los datos los datos a un DataFrame y explorar brevemente\n", "- Eliminar las observaciones que tengan algun dato faltante\n", "- Eliminar las observaciones con el valor atípico (max) para la variable 'median_house_value'\n", "- Aplicar one hot encoding a la variable 'ocean_proximity'\n", "- Crear una nueva variable boolean 'above_median'\n", "- Aplicar los pasos train-test-split para poder entrenar y evaluar el clasificador\n", "\n", "Cuáles los valores para accuracy, matriz de confusion, precision, recall y f1 del clasificador?" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "import numpy as np\n", "import pandas as pd\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.datasets import load_boston" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | longitude | \n", "latitude | \n", "housing_median_age | \n", "total_rooms | \n", "total_bedrooms | \n", "population | \n", "households | \n", "median_income | \n", "expected_house_value | \n", "ocean_proximity | \n", "
---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "-122.23 | \n", "37.88 | \n", "41.0 | \n", "880.0 | \n", "129.0 | \n", "322.0 | \n", "126.0 | \n", "8.3252 | \n", "452600.0 | \n", "NEAR BAY | \n", "
1 | \n", "-122.22 | \n", "37.86 | \n", "21.0 | \n", "7099.0 | \n", "1106.0 | \n", "2401.0 | \n", "1138.0 | \n", "8.3014 | \n", "358500.0 | \n", "NEAR BAY | \n", "
2 | \n", "-122.24 | \n", "37.85 | \n", "52.0 | \n", "1467.0 | \n", "190.0 | \n", "496.0 | \n", "177.0 | \n", "7.2574 | \n", "352100.0 | \n", "NEAR BAY | \n", "
3 | \n", "-122.25 | \n", "37.85 | \n", "52.0 | \n", "1274.0 | \n", "235.0 | \n", "558.0 | \n", "219.0 | \n", "5.6431 | \n", "341300.0 | \n", "NEAR BAY | \n", "
4 | \n", "-122.25 | \n", "37.85 | \n", "52.0 | \n", "1627.0 | \n", "280.0 | \n", "565.0 | \n", "259.0 | \n", "3.8462 | \n", "342200.0 | \n", "NEAR BAY | \n", "
\n", " | longitude | \n", "latitude | \n", "housing_median_age | \n", "total_rooms | \n", "total_bedrooms | \n", "population | \n", "households | \n", "median_income | \n", "expected_house_value | \n", "ocean_proximity_<1H OCEAN | \n", "ocean_proximity_INLAND | \n", "ocean_proximity_ISLAND | \n", "ocean_proximity_NEAR BAY | \n", "ocean_proximity_NEAR OCEAN | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "-122.23 | \n", "37.88 | \n", "41.0 | \n", "880.0 | \n", "129.0 | \n", "322.0 | \n", "126.0 | \n", "8.3252 | \n", "452600.0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "
1 | \n", "-122.22 | \n", "37.86 | \n", "21.0 | \n", "7099.0 | \n", "1106.0 | \n", "2401.0 | \n", "1138.0 | \n", "8.3014 | \n", "358500.0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "
2 | \n", "-122.24 | \n", "37.85 | \n", "52.0 | \n", "1467.0 | \n", "190.0 | \n", "496.0 | \n", "177.0 | \n", "7.2574 | \n", "352100.0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "
3 | \n", "-122.25 | \n", "37.85 | \n", "52.0 | \n", "1274.0 | \n", "235.0 | \n", "558.0 | \n", "219.0 | \n", "5.6431 | \n", "341300.0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "
4 | \n", "-122.25 | \n", "37.85 | \n", "52.0 | \n", "1627.0 | \n", "280.0 | \n", "565.0 | \n", "259.0 | \n", "3.8462 | \n", "342200.0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
20635 | \n", "-121.09 | \n", "39.48 | \n", "25.0 | \n", "1665.0 | \n", "374.0 | \n", "845.0 | \n", "330.0 | \n", "1.5603 | \n", "78100.0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "
20636 | \n", "-121.21 | \n", "39.49 | \n", "18.0 | \n", "697.0 | \n", "150.0 | \n", "356.0 | \n", "114.0 | \n", "2.5568 | \n", "77100.0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "
20637 | \n", "-121.22 | \n", "39.43 | \n", "17.0 | \n", "2254.0 | \n", "485.0 | \n", "1007.0 | \n", "433.0 | \n", "1.7000 | \n", "92300.0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "
20638 | \n", "-121.32 | \n", "39.43 | \n", "18.0 | \n", "1860.0 | \n", "409.0 | \n", "741.0 | \n", "349.0 | \n", "1.8672 | \n", "84700.0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "
20639 | \n", "-121.24 | \n", "39.37 | \n", "16.0 | \n", "2785.0 | \n", "616.0 | \n", "1387.0 | \n", "530.0 | \n", "2.3886 | \n", "89400.0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "
19369 rows × 14 columns
\n", "\n", " | longitude | \n", "latitude | \n", "housing_median_age | \n", "total_rooms | \n", "total_bedrooms | \n", "population | \n", "households | \n", "median_income | \n", "expected_house_value | \n", "ocean_proximity_<1H OCEAN | \n", "ocean_proximity_INLAND | \n", "ocean_proximity_ISLAND | \n", "ocean_proximity_NEAR BAY | \n", "ocean_proximity_NEAR OCEAN | \n", "above_median | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "-122.23 | \n", "37.88 | \n", "41.0 | \n", "880.0 | \n", "129.0 | \n", "322.0 | \n", "126.0 | \n", "8.3252 | \n", "452600.0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "True | \n", "
1 | \n", "-122.22 | \n", "37.86 | \n", "21.0 | \n", "7099.0 | \n", "1106.0 | \n", "2401.0 | \n", "1138.0 | \n", "8.3014 | \n", "358500.0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "True | \n", "
2 | \n", "-122.24 | \n", "37.85 | \n", "52.0 | \n", "1467.0 | \n", "190.0 | \n", "496.0 | \n", "177.0 | \n", "7.2574 | \n", "352100.0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "True | \n", "
3 | \n", "-122.25 | \n", "37.85 | \n", "52.0 | \n", "1274.0 | \n", "235.0 | \n", "558.0 | \n", "219.0 | \n", "5.6431 | \n", "341300.0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "True | \n", "
4 | \n", "-122.25 | \n", "37.85 | \n", "52.0 | \n", "1627.0 | \n", "280.0 | \n", "565.0 | \n", "259.0 | \n", "3.8462 | \n", "342200.0 | \n", "0 | \n", "0 | \n", "0 | \n", "1 | \n", "0 | \n", "True | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
20635 | \n", "-121.09 | \n", "39.48 | \n", "25.0 | \n", "1665.0 | \n", "374.0 | \n", "845.0 | \n", "330.0 | \n", "1.5603 | \n", "78100.0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "False | \n", "
20636 | \n", "-121.21 | \n", "39.49 | \n", "18.0 | \n", "697.0 | \n", "150.0 | \n", "356.0 | \n", "114.0 | \n", "2.5568 | \n", "77100.0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "False | \n", "
20637 | \n", "-121.22 | \n", "39.43 | \n", "17.0 | \n", "2254.0 | \n", "485.0 | \n", "1007.0 | \n", "433.0 | \n", "1.7000 | \n", "92300.0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "False | \n", "
20638 | \n", "-121.32 | \n", "39.43 | \n", "18.0 | \n", "1860.0 | \n", "409.0 | \n", "741.0 | \n", "349.0 | \n", "1.8672 | \n", "84700.0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "False | \n", "
20639 | \n", "-121.24 | \n", "39.37 | \n", "16.0 | \n", "2785.0 | \n", "616.0 | \n", "1387.0 | \n", "530.0 | \n", "2.3886 | \n", "89400.0 | \n", "0 | \n", "1 | \n", "0 | \n", "0 | \n", "0 | \n", "False | \n", "
19369 rows × 15 columns
\n", "